The objectives of this notebook are:
Visualize how well we could remove technical variability associated with the assay (scATAC or multiome). We should see a high degree of intermixing between the 2 assays.
Plot several markers of potential doublets or problematic cells: - scRNAseq doublets determined by 10X Multiome - Scrublet doublet predictions - nCount and nFeatures of scATAC - Chromatin Signature - Projection of the scRNA-seq clusters defined at level 3
The broader objective of level 2 is to eliminate most remaining doublets and poor-quality cells, as we will discuss in future notebooks. These visualizations will allow us to explore this. We will also plot their location in the UMAP obtained at level 1.
library(Seurat)
library(tidyverse)
library(reshape2)
library(ggpubr)
# Paths
path_to_obj <- str_c(
here::here("scATAC-seq/results/R_objects/level_2/"),
params$cell_type,
"/",
params$cell_type,
"_integrated_level_2.rds",
sep = ""
)
path_to_obj_RNA <- str_c(
here::here("scRNA-seq/3-clustering/3-level_3/"),
params$cell_type,
"_clustered_level_3_with_pre_freeze.rds",
sep = ""
)
path_to_doublets <- here::here("scRNA-seq/3-clustering/2-level_2/tmp/doublets_multiome_df_all.rds")
# Functions
source(here::here("scRNA-seq/bin/utils.R"))
# Seurat object
seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat
## 270784 features across 27497 samples within 1 assay
## Active assay: peaks_macs (270784 features, 267464 variable features)
## 3 dimensional reductions calculated: umap, lsi, harmony
seurat_RNA <- readRDS(path_to_obj_RNA)
seurat_RNA
## An object of class Seurat
## 37378 features across 81653 samples within 1 assay
## Active assay: RNA (37378 features, 0 variable features)
## 3 dimensional reductions calculated: pca, umap, harmony
tonsil_RNA_annotation <- seurat_RNA@meta.data %>%
rownames_to_column(var = "cell_barcode") %>%
dplyr::filter(assay == "multiome") %>%
dplyr::select("cell_barcode", "seurat_clusters")
p1 <- DimPlot(seurat,
pt.size = 0.1)
p2 <- DimPlot(seurat_RNA,
group.by = "seurat_clusters",
pt.size = 0.1, label = T) + NoLegend()
p1 + p2
p_assay <- plot_split_umap(seurat, var = "assay")
p_assay
p_assay <- plot_split_umap(seurat, var = "hospital")
p_assay
p_assay <- plot_split_umap(seurat, var = "age_group")
p_assay
p_assay <- plot_split_umap(seurat, var = "sex")
p_assay
Here, we can see the count of doublets detected by cell-type. Note that for the level1 annotation there are not doublets detected in the Naive and Memory B cell cluster.
multiome_doublets <- readRDS(path_to_doublets)
dfm = melt(table(multiome_doublets$cell_type))
dfm$value = as.numeric(as.character(dfm$value))
ggbarplot(dfm, x = "Var1", y = "value",
fill = "Var1",
palette = "jco",
sort.val = "desc",
sort.by.groups = FALSE,
x.text.angle = 90
)
doublets_cells <- colnames(seurat)[which(colnames(seurat) %in% multiome_doublets$barcode)]
length(doublets_cells)
## [1] 1571
DimPlot(
seurat, reduction = "umap",
cols.highlight = "darkred", cols= "grey",
cells.highlight= doublets_cells,
pt.size = 0.1
)
## Scrublet prediction
# Scrublet
DimPlot(seurat, group.by = "scrublet_predicted_doublet_atac")
table(seurat$scrublet_predicted_doublet_atac)
##
## FALSE TRUE
## 24936 2561
qc_vars <- c(
"nCount_peaks",
"nFeature_peaks"
)
qc_gg <- purrr::map(qc_vars, function(x) {
p <- FeaturePlot(seurat, features = x)
p
})
qc_gg
## [[1]]
##
## [[2]]
qc_vars <- c(
"annotation_prob")
qc_gg <- purrr::map(qc_vars, function(x) {
p <- FeaturePlot(seurat, features = x)
p
})
qc_gg
## [[1]]
qc_vars <- c("NBC.MBC", "GCBC", "PC", "CD4.T", "Cytotoxic",
"myeloid", "FDC", "PDC")
qc_gg <- purrr::map(qc_vars, function(x) {
p <- FeaturePlot(seurat, feature = x, max.cutoff = 4, min.cutoff = -4) + scale_color_viridis_c(option = "magma")
p
})
qc_gg
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
Annotation level 3 for scATAC will be defined “a priori” as unannotated and the scRNA annotation will be transfered to the scATAC-multiome cells based on the same cell barcode.
cell_testing <- c("NBC_MBC", "GCBC", "PC", "CD4_T","Cytotoxic")
if (unique(seurat$annotation_level_1) %in% cell_testing){
seurat_df <- data.frame(cell_barcode = colnames(seurat)[seurat$assay == "scATAC"])
seurat_df$seurat_clusters <- "unannotated"
df_all <- rbind(tonsil_RNA_annotation,seurat_df)
rownames(df_all) <- df_all$cell_barcode
df_all <- df_all[colnames(seurat), ]
seurat$seurat_clusters <- df_all$seurat_clusters
seurat@meta.data$annotation_prob <- 1
melt(table(seurat$seurat_clusters))
table(is.na(seurat$seurat_clusters))
p3 <- DimPlot(seurat,
group.by = "seurat_clusters",
pt.size = 0.5)
p3
}
p2
umap_level_1 <- FeatureScatter(
seurat,
"UMAP_1_level_1",
"UMAP_2_level_1",
group.by = "annotation_level_1"
)
umap_level_1 <- umap_level_1 +
theme(
#legend.position = "none",
plot.title = element_blank()
)
umap_level_1
sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS/LAPACK: /Users/pauli/opt/anaconda3/envs/Tonsil_atlas/lib/libopenblasp-r0.3.10.dylib
##
## locale:
## [1] es_ES.UTF-8/UTF-8/es_ES.UTF-8/C/es_ES.UTF-8/es_ES.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] Signac_1.1.0.9000 ggpubr_0.4.0 reshape2_1.4.4 forcats_0.5.0 stringr_1.4.0 dplyr_1.0.2 purrr_0.3.4 readr_1.4.0 tidyr_1.1.2 tibble_3.0.4 ggplot2_3.3.2 tidyverse_1.3.0 Seurat_3.9.9.9010 BiocStyle_2.16.1
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.1 SnowballC_0.7.0 rtracklayer_1.48.0 GGally_2.0.0 bit64_4.0.5 knitr_1.30 irlba_2.3.3 DelayedArray_0.14.0 data.table_1.13.2 rpart_4.1-15 RCurl_1.98-1.2 AnnotationFilter_1.12.0 generics_0.1.0 BiocGenerics_0.34.0 GenomicFeatures_1.40.1 cowplot_1.1.0 RSQLite_2.2.1 RANN_2.6.1 future_1.20.1 bit_4.0.4 spatstat.data_2.1-0 xml2_1.3.2 lubridate_1.7.9 httpuv_1.5.4 ggsci_2.9 SummarizedExperiment_1.18.1 assertthat_0.2.1 xfun_0.18 hms_0.5.3 evaluate_0.14 promises_1.1.1 fansi_0.4.1 progress_1.2.2 dbplyr_1.4.4 readxl_1.3.1 igraph_1.2.6 DBI_1.1.0 htmlwidgets_1.5.2 reshape_0.8.8 stats4_4.0.3 ellipsis_0.3.1 backports_1.2.0
## [43] bookdown_0.21 biomaRt_2.44.4 deldir_0.2-3 vctrs_0.3.4 Biobase_2.48.0 here_1.0.1 ensembldb_2.12.1 ROCR_1.0-11 abind_1.4-5 withr_2.3.0 ggforce_0.3.2 BSgenome_1.56.0 checkmate_2.0.0 sctransform_0.3.1 GenomicAlignments_1.24.0 prettyunits_1.1.1 goftest_1.2-2 cluster_2.1.0 lazyeval_0.2.2 crayon_1.3.4 labeling_0.4.2 pkgconfig_2.0.3 tweenr_1.0.1 GenomeInfoDb_1.24.0 nlme_3.1-150 ProtGenerics_1.20.0 nnet_7.3-14 rlang_0.4.11 globals_0.13.1 lifecycle_0.2.0 miniUI_0.1.1.1 BiocFileCache_1.12.1 modelr_0.1.8 rsvd_1.0.3 dichromat_2.0-0 cellranger_1.1.0 rprojroot_2.0.2 polyclip_1.10-0 matrixStats_0.57.0 lmtest_0.9-38 graph_1.66.0 ggseqlogo_0.1
## [85] Matrix_1.3-2 carData_3.0-4 zoo_1.8-8 reprex_0.3.0 base64enc_0.1-3 ggridges_0.5.2 png_0.1-7 viridisLite_0.3.0 bitops_1.0-6 KernSmooth_2.23-17 Biostrings_2.56.0 blob_1.2.1 parallelly_1.21.0 jpeg_0.1-8.1 rstatix_0.6.0 S4Vectors_0.26.0 ggsignif_0.6.0 scales_1.1.1 memoise_1.1.0 magrittr_1.5 plyr_1.8.6 ica_1.0-2 zlibbioc_1.34.0 compiler_4.0.3 RColorBrewer_1.1-2 fitdistrplus_1.1-1 Rsamtools_2.4.0 cli_2.1.0 XVector_0.28.0 listenv_0.8.0 patchwork_1.1.0 pbapply_1.4-3 ps_1.4.0 htmlTable_2.1.0 Formula_1.2-4 MASS_7.3-53 mgcv_1.8-33 tidyselect_1.1.0 stringi_1.5.3 yaml_2.2.1 askpass_1.1 latticeExtra_0.6-29
## [127] ggrepel_0.8.2 grid_4.0.3 VariantAnnotation_1.34.0 fastmatch_1.1-0 tools_4.0.3 future.apply_1.6.0 parallel_4.0.3 rio_0.5.16 rstudioapi_0.12 lsa_0.73.2 foreign_0.8-80 gridExtra_2.3 farver_2.0.3 Rtsne_0.15 digest_0.6.27 BiocManager_1.30.10 shiny_1.5.0 Rcpp_1.0.5 GenomicRanges_1.40.0 car_3.0-10 broom_0.7.2 later_1.1.0.1 RcppAnnoy_0.0.16 OrganismDbi_1.30.0 httr_1.4.2 AnnotationDbi_1.50.3 ggbio_1.36.0 biovizBase_1.36.0 colorspace_2.0-0 rvest_0.3.6 XML_3.99-0.3 fs_1.5.0 tensor_1.5 reticulate_1.18 IRanges_2.22.1 splines_4.0.3 RBGL_1.64.0 uwot_0.1.8.9001 RcppRoll_0.3.0 spatstat.utils_2.1-0 plotly_4.9.2.1 xtable_1.8-4
## [169] jsonlite_1.7.1 spatstat_1.64-1 R6_2.5.0 Hmisc_4.4-1 pillar_1.4.6 htmltools_0.5.1.1 mime_0.9 glue_1.4.2 fastmap_1.0.1 BiocParallel_1.22.0 codetools_0.2-17 lattice_0.20-41 curl_4.3 leiden_0.3.5 zip_2.1.1 openxlsx_4.2.3 openssl_1.4.3 survival_3.2-7 rmarkdown_2.5 munsell_0.5.0 GenomeInfoDbData_1.2.3 haven_2.3.1 gtable_0.3.0